Dream | coef = age.

Dream analysis

To use cause_of_death in the model, I got the NAs and change it for “Other” category.

To use C1-C4 I calculated the median for the missing samples.

params = BiocParallel::MulticoreParam(workers=10, progressbar=T)
register(params)
registerDoParallel(10)

metadata$cause_of_death_categories[metadata$cause_of_death_categories %in% NA] <- "Other"
#table(metadata$cause_of_death_categories)

metadata$C1 = metadata$C1 %>% replace_na(median(metadata$C1, na.rm = T))
metadata$C2 = metadata$C2 %>% replace_na(median(metadata$C2, na.rm = T))
metadata$C3 = metadata$C3 %>% replace_na(median(metadata$C3, na.rm = T))
metadata$C4 = metadata$C4 %>% replace_na(median(metadata$C4, na.rm = T))

# Matching the names from the DE analysis to use the same code 
genes_counts4deg = genes_counts_exp_3rd
metadata4deg = metadata
# all(colnames(genes_counts_exp_3rd) == metadata$donor_tissue) # Check the order of columns - TRUE 

# The dream model operates directly on the results of voom. 
# The only change compared to the standard limma workflow is to replace lmFit with dream. 

# Check variance partition version 
# packageVersion("variancePartition")  # Must be 1.17.7

# The variable to be tested should be a fixed effect
form <- ~ sex + (1|donor_id) + (1|cause_of_death_categories) + C1 + C2 + C3 + C4 + picard_pct_mrna_bases + picard_summed_median + picard_pct_ribosomal_bases + age * tissue

# estimate weights using linear mixed model of dream
vobjDream = suppressWarnings( voomWithDreamWeights( genes_counts4deg, form, metadata4deg ) ) # supressing messages because of Biocparallel was generating a lot of messages  
 
# Fit the dream model on each gene
# By default, uses the Satterthwaite approximation for the hypothesis test
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "MFG")
fitmm_MFGref = suppressWarnings (dream( vobjDream, form, metadata4deg )) 
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "STG")
fitmm_STGref = suppressWarnings (dream( vobjDream, form, metadata4deg )) 
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "SVZ")
fitmm_SVZref = suppressWarnings (dream( vobjDream, form, metadata4deg ))
metadata4deg$tissue = relevel(metadata4deg$tissue, ref = "THA")
fitmm_THAref = suppressWarnings (dream( vobjDream, form, metadata4deg )) 

save(vobjDream, fitmm_MFGref,fitmm_STGref,fitmm_SVZref,fitmm_THAref, metadata4deg, genes_counts4deg,  file = paste0(work_plots,"/age_interaction_dreamObj.RData"))

DE genes with interaction term

adj.P.Val<0.01

load(paste0(work_plots,"/age_interaction_dreamObj.RData"))

MFG_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
MFG_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
MFG_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_MFGref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])

STG_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
STG_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
STG_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_STGref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])

SVZ_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
SVZ_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
SVZ_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_SVZref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])

THA_0.01 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.01))))[c(1,3),])
THA_0.05 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.05))))[c(1,3),])
THA_0.10 = colSums(as.data.frame(unclass(summary(decideTests(fitmm_THAref, adjust.method = "BH", p.value = 0.10))))[c(1,3),])


df_res_0.01 = data.frame(MFG = c(NA,MFG_0.01["age:tissueSTG"],MFG_0.01["age:tissueSVZ"],MFG_0.01["age:tissueTHA"]),
                         STG = c(STG_0.01["age:tissueMFG"],NA,STG_0.01["age:tissueSVZ"],STG_0.01["age:tissueTHA"]),
                         SVZ = c(SVZ_0.01["age:tissueMFG"],SVZ_0.01["age:tissueSTG"],NA,SVZ_0.01["age:tissueTHA"]),
                         THA = c(THA_0.01["age:tissueMFG"],THA_0.01["age:tissueSTG"],THA_0.01["age:tissueSVZ"],NA))
rownames(df_res_0.01) = c("MFG","STG","SVZ","THA")

ComplexHeatmap::Heatmap(as.matrix(df_res_0.01), 
                        name = "DE FDR<1%",
                        cell_fun = function(j, i, x, y, width, height, fill) {
                            grid.text(sprintf("%d", df_res_0.01[i, j]), x, y, gp = gpar(fontsize = 10))}, 
                        cluster_columns = F, 
                        cluster_rows = F, 
                        col = colorRamp2(c(min(df_res_0.01,na.rm = T),max(df_res_0.01,na.rm = T)), c("white", "#CB454A")), rect_gp = gpar(col = "white", lwd = 2) )

Show some DE genes in the interaction term

List of all interaction age-tissue genes at FDR<5% for all 6 possible comparisons

res_age_MFG_SVZ <- data.frame(topTable(fitmm_MFGref, coef='age:tissueSVZ', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_SVZ") 
res_age_MFG_THA <- data.frame(topTable(fitmm_MFGref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_THA")
res_age_MFG_STG <- data.frame(topTable(fitmm_MFGref, coef='age:tissueSTG', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "MFG_STG")
res_age_STG_SVZ <- data.frame(topTable(fitmm_STGref, coef='age:tissueSVZ', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "STG_SVZ")
res_age_STG_THA <- data.frame(topTable(fitmm_STGref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "STG_THA")
res_age_SVZ_THA <- data.frame(topTable(fitmm_SVZref, coef='age:tissueTHA', number=nrow(genes_counts4deg)), check.names = F) %>% rownames_to_column("ensembl") %>% mutate(comparison = "SVZ_THA")

res_age = rbind(res_age_MFG_SVZ,res_age_MFG_THA,res_age_MFG_STG,res_age_STG_SVZ,res_age_STG_THA,res_age_SVZ_THA)

## Get conversion table for Gencode 30
gencode_30 = read.table("~/pd-omics/katia/ens.geneid.gencode.v30", header = T)
colnames(gencode_30) = c("ensembl","symbol")

res_name = merge(res_age, gencode_30, by="ensembl")
res_name = res_name[order(res_name$adj.P.Val), ]

write.table(x = res_name, file = paste0(work_plots,"all_comparisons.txt"), quote = F, sep = "\t", row.names = F, col.names = T)
createDT(res_name %>% filter(adj.P.Val<0.05))

List with significant interaction at FDR 5% in any comparison (group by gene)

Plot top-5 genes (MFG vs SVZ)

Expression in log10(TPM)

metadata4deg$tissue = factor(metadata4deg$tissue, levels = c("MFG","STG","SVZ","THA"))
# After running the limma you can extract the results using the topTable function.
# Here I pick the gene name from the top DE gene
top5Gene = rownames(topTable(fitmm_MFGref, coef = "age:tissueSVZ"))[1:5]

# Now get the expression of the gene after running regressing covariates
geneExp = t(log10(genes_tpm_exp_3rd[top5Gene,]+1))
rownames(gencode_30) = gencode_30$ensembl
colnames(geneExp) = gencode_30[colnames(geneExp),"symbol"]
geneExp = as.data.frame(geneExp)

# Add the expression as another column in the metadata table (samples in both tables should be in the same order!)
meta_tmp = cbind(metadata4deg, geneExp)
meta_tmp_m = melt(meta_tmp, measure.vars = c(colnames(geneExp)))

lm_eqn <- function(df, y, x){
  formula = as.formula(sprintf('%s ~ %s', y, x))
  m <- lm(formula, data=df);
  # formating the values into a summary string to print out
  # ~ give some space, but equal size and comma need to be quoted
  eq <- substitute(italic(target) == a + b %.% italic(input)*","~~italic(r)^2~"="~r2*","~~p~"="~italic(pvalue), 
                   list(target = y,
                        input = x,
                        a = format(as.vector(coef(m)[1]), digits = 2), 
                        b = format(as.vector(coef(m)[2]), digits = 2), 
                        r2 = format(summary(m)$r.squared, digits = 3),
                        # getting the pvalue is painful
                        pvalue = format(summary(m)$coefficients[2,'Pr(>|t|)'], digits=1)
                   )
  )
  as.character(as.expression(eq));                 
}

# Plot the expression of the gene.
library(ggpmisc)
ggplot(meta_tmp_m, aes(x = age, y = value, color = tissue)) + 
  geom_point() +
  scale_color_futurama() + 
  easy_labs(x = "Age", y = expression(paste(log[10],"(TPM+1)"))) +
  geom_smooth(method='lm', alpha = .3, se = F) +
  stat_poly_eq(aes(label = paste(..rr.label..)), 
               rr.digits = 2, 
               label.x = 0.1, label.y = .98,
               formula = y ~ x, parse = TRUE, size = 4) +
  facet_grid(variable ~ tissue, scales = "free") + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  easy_remove_legend()  
ggsave(paste0(work_plots,"/top5_agerelated_TPM.pdf"), width = 10, height = 10)

Same thing but with residuals (z-scored)

# Get the residuals after adjusting for covariates
residuals = removeBatchEffect(vobjDream$E, # data after running voom
    batch = metadata4deg$tissue, # add one categorical covariate
    batch2 = paste(metadata4deg$sex,"_",metadata4deg$cause_of_death_categories), # add a second categorical covariate ( if you want )
    # here you can force the regression to NOT remove effects of some variable of interest
    #design = model.matrix(~ + age + tissue, data = metadata4deg),
    # add the column with continuous covariates
    covariates = metadata4deg %>% dplyr::select(picard_pct_mrna_bases, picard_summed_median, picard_pct_ribosomal_bases, C1 , C2 , C3 , C4))

residuals_zscore = t(scale(t(residuals)))

# Now get the expression of the gene after running regressing covariates
geneExp = t(residuals_zscore[top5Gene,]+1)
rownames(gencode_30) = gencode_30$ensembl
colnames(geneExp) = gencode_30[colnames(geneExp),"symbol"]
geneExp = as.data.frame(geneExp)

# Add the expression as another column in the metadata table (samples in both tables should be in the same order!)
meta_tmp = cbind(metadata4deg, geneExp)
meta_tmp_m = melt(meta_tmp, measure.vars = c(colnames(geneExp)))

# Plot the expression of the gene.
library(ggpmisc)
ggplot(meta_tmp_m, aes(x = age, y = value, color = tissue)) + 
  geom_point() +
  scale_color_futurama() + 
  easy_labs(x = "Age", y = expression(paste("Residual expression (Z-score)"))) +
  geom_smooth(method='lm', alpha = .3, se = F) +
  stat_poly_eq(aes(label = paste(..rr.label..)), 
               rr.digits = 2, 
               label.x = 0.1, label.y = .98,
               formula = y ~ x, parse = TRUE, size = 4) +
  facet_grid(variable ~ tissue, scales = "free") + 
  theme_bw() + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank()) +
  easy_remove_legend()  
ggsave(paste0(work_plots,"/top5_agerelated_residual.pdf"), width = 10, height = 10)